This is the FOURTH script used to generate the main figures for the the manuscript titled “A spatial gradient of bacterial diversity in the human oral cavity shaped by salivary flow”

This script and associated data are provided via (c) by Diana M Proctor, Julia A. Fukuyama, Susan P. Holmes. These data and the associated script are licensed under the Creative Commons Attribution-ShareAlike 4.0 International License (CC-BY-CA).

Given attribution, you are free to: 1) Share, copy and redistribute the material in any medium or format 2) Adapt, remix, transform, and build upon the material for any purpose, even commercially.

To see the full license associated with attribution of this work, see the CC-By-CA license, see http://creativecommons.org/licenses/by-sa/4.0/.

Import the data (from the m.s.)

In this document, we perform the following spatial analysis on hellinger-transformed data: - trend surface analysis with forward selection of the polynomial terms - PCNM with selection of significant PCNM variables using a single distance threshold - MEM - testing of multiple distance thresholds and models and selection of the best model with AIC

library("phyloseq");library("ggplot2");library(gridExtra);library("stringr");library("reshape2");library("genefilter"); library(knitr);library(DESeq2)
setwd("~/Desktop/Proctor_NatureComm/")
supra = readRDS("~/Dropbox/Figures_20170617/Revised/supra_v2.0.RDS")

load the R scripts that were downloaded for the PCNM package from the sedar github

setwd("~/Desktop/PCNM/sedar-master/pkg/PCNM/R/")
for (f in list.files(pattern="*.R")) {
    source(f)
}

1. Look at the map of the sample locations

library(RColorBrewer)
myPalette = colorRampPalette(brewer.pal(11, "RdBu"), space="Lab")

#create validation datset #2 for time x space analysis, all teeth, healthy subjects
keep <- c("1-101", "1-102", "1-103", "1-104", "1-105", "1-106", "1-107")
FullMouth <- subset_samples(supra, Subject %in% keep & Protocol=="Clinic")
FullMouth <- prune_taxa(taxa_sums(FullMouth) > 0, FullMouth)
FullMouth
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 469 taxa and 1701 samples ]
## sample_data() Sample Data:       [ 1701 samples by 42 sample variables ]
## tax_table()   Taxonomy Table:    [ 469 taxa by 10 taxonomic ranks ]
#filter the data
filtergroup = filterfun(kOverA(k=200, A=1)) #k = number of samples; A = abundance
        f1 = filter_taxa(FullMouth, filtergroup, prune=TRUE) 
        f1 = prune_taxa(taxa_sums(f1) > 0, f1)
        f1 = prune_samples(sample_sums(f1) > 0, f1)
        f1
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 117 taxa and 1701 samples ]
## sample_data() Sample Data:       [ 1701 samples by 42 sample variables ]
## tax_table()   Taxonomy Table:    [ 117 taxa by 10 taxonomic ranks ]
otus = data.frame(otu_table(f1))
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.4-4
otus.h = decostand(otus, "hellinger")
#get the x,y coordinates        
map <- data.frame(sample_data(f1))
map$x = as.numeric(as.character(map$x))
map$y = as.numeric(as.character(map$y))
xygrid <- cbind(map$x, map$y)
xygrid.c <- scale(xygrid, center=TRUE, scale=FALSE)
#generate the plot of the sample locations using ggplot
ggplot(map, aes(x, y, color=Tooth_Class, shape=Tooth_Aspect)) + geom_point() + theme_bw()

PCNM

PCNM

1. do QuickPCNM

setwd("~/Desktop/PCNM/raw/")
#perform PCNM on the hellinger transformed data
fm.PCNM.quick <- quickPCNM(otus.h, xygrid)
## Loading required package: ade4
## 
## Attaching package: 'ade4'
## The following object is masked from 'package:GenomicRanges':
## 
##     score
## The following object is masked from 'package:IRanges':
## 
##     score
## The following object is masked from 'package:BiocGenerics':
## 
##     score
## Truncation level = 0.5763064
## Loading required package: AEM
## Loading required package: spdep
## Loading required package: sp
## 
## Attaching package: 'sp'
## The following object is masked from 'package:IRanges':
## 
##     %over%
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:S4Vectors':
## 
##     expand
## 
## Attaching package: 'spdep'
## The following object is masked from 'package:ade4':
## 
##     mstree
## Loading required package: boot
## 
## Attaching package: 'boot'
## The following object is masked from 'package:lattice':
## 
##     melanoma
## Time to compute PCNMs = 1044.521000  sec
## Loading required package: packfor
## packfor: R Package for Forward Selection (Canoco Manual p.49)
## version0.0-8

## Testing variable 1
## Testing variable 2
## Testing variable 3
## Testing variable 4
## Testing variable 5
## Testing variable 6
## Testing variable 7
## Testing variable 8
## Testing variable 9
## Procedure stopped (adjR2thresh criteria) adjR2cum = 0.027358 with 9 variables (superior to 0.027069)
## 
## -------------------------------------------------------
## A significant linear trend has been found in the response data.
## The data have been detrended prior to PCNM analysis.
## -------------------------------------------------------
## The truncation value used for PCNM building is 0.5763064 
## 24  PCNM eigenvectors have been produced 
## Adjusted R2 of global model =  0.0271 
## 8  PCNM variables have been selected 
## R2 of minimum model =  0.0314                      
## Adjusted R2 of minimum model =  0.0268          
## ---------------------------------------------------------
## Time to compute quickPCNM = 1390.397000  sec
summary(fm.PCNM.quick)
##                    Length Class      Mode   
## PCNM               24     data.frame list   
## PCNM_eigenvalues   24     -none-     numeric
## fwd.sel             7     data.frame list   
## PCNM_reduced_model  8     data.frame list   
## RDA                12     rda        list   
## RDA_test            4     anova.cca  list   
## RDA_axes_tests      4     anova.cca  list
fm.PCNM.quick[[2]] #eigenvalues
##  [1] 1181.21171  848.67940  778.73057  499.13964  426.55003  190.62618
##  [7]  159.32015  128.21193  113.91230  107.60752  105.23425   85.80224
## [13]   81.56902   72.46114   69.77880   64.27327   55.14665   50.93297
## [19]   49.20153   47.80905   42.04516   37.02682   32.02251   28.05140
fm.PCNM.quick[[3]] #results of forward selection
##   variables order          R2      R2Cum   AdjR2Cum         F  pval
## 1        V4     4 0.010911581 0.01091158 0.01032942 18.743294 0.001
## 2       V20    20 0.006419316 0.01733090 0.01617345 11.092237 0.001
## 3       V17    17 0.003457780 0.02078868 0.01905760  5.992427 0.001
## 4       V23    23 0.002534252 0.02332293 0.02101944  4.400729 0.001
## 5       V18    18 0.002426297 0.02574922 0.02287533  4.221267 0.001
## 6        V5     5 0.002360013 0.02810924 0.02466689  4.113489 0.001
## 7       V14    14 0.002069397 0.03017864 0.02616874  3.612510 0.001
## 8        V6     6 0.001189615 0.03136825 0.02678843  2.078013 0.019
fw.res= data.frame(fm.PCNM.quick[[3]])
sig = fw.res$variables

#test the axes for significance
fm.PCNM.quick$RDA_axes_tests
## Permutation test for rda under reduced model
## Forward tests for axes
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = Y.det ~ V4 + V5 + V6 + V14 + V17 + V18 + V20 + V23, data = PCNMred)
##            Df Variance       F Pr(>F)    
## RDA1        1  0.00749 30.3973  0.001 ***
## RDA2        1  0.00356 14.4561  0.001 ***
## RDA3        1  0.00107  4.3479  0.011 *  
## RDA4        1  0.00049  1.9771  0.431    
## RDA5        1  0.00037  1.4893           
## RDA6        1  0.00028  1.1513           
## RDA7        1  0.00017  0.6812           
## RDA8        1  0.00007  0.2938           
## Residual 1692  0.41676                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#subset on the sig variables and then plot
sig.df= data.frame(fm.PCNM.quick$PCNM)
sigdata <- sig.df[sig]
head(sigdata)
##           V4        V20         V17         V23         V18         V5
## 1  0.7293754 0.13429034 0.395989925 -0.03108152 -0.44002924 -0.6289961
## 2  0.6826409 0.09663508 0.109407928  0.10244640  0.05557921 -0.6316427
## 3  0.4310809 0.27630372 0.002260752  0.01232607 -0.13301414 -0.4937597
## 4  0.1281021 0.21644545 0.010984117  0.01083507  0.19278022 -0.3047346
## 5 -0.0249256 0.02934535 0.032670839 -0.01250219 -0.01882887 -0.2138190
## 6 -0.7588141 0.08937326 0.009710169 -0.03170470  0.09479388  0.3693355
##          V14         V6
## 1 -0.1730998  0.6603423
## 2 -0.1014922  0.5426773
## 3  0.1803336  0.2232134
## 4 -0.1109037 -0.2093396
## 5  0.1899693 -0.1372993
## 6 -0.2440033 -0.3870874
#what is the adjusted R2
(R2adj.pcnm <- RsquareAdj(fm.PCNM.quick$RDA)$adj.r.squared)
## [1] 0.02678843
#project the fitted model
df = data.frame(fm.PCNM.quick$RDA$CCA$u, sample_data(f1))
df$x = as.numeric(as.character(df$x))
df$y = as.numeric(as.character(df$y))

p1 = ggplot(df, aes(x, y, color=RDA1)) +ggtitle("RDA1") + geom_point(aes(x=x, y=y)) + coord_fixed() +scale_x_continuous(limits=c(-1,1)) + geom_jitter(position=position_jitter(width=0.045, height = 0.045), size=2) + scale_color_gradientn(colours=myPalette(100)) + theme_bw() +theme(text = element_text(size=12), axis.text.x = element_text(angle=0, vjust=1))

p2 = ggplot(df, aes(x, y, color=RDA2)) +ggtitle("RDA2") + geom_point(aes(x=x, y=y)) + coord_fixed() +scale_x_continuous(limits=c(-1,1)) + geom_jitter(position=position_jitter(width=0.045, height = 0.045), size=2) + scale_color_gradientn(colours=myPalette(100)) + theme_bw() +theme(text = element_text(size=12), axis.text.x = element_text(angle=0, vjust=1))


p3 = ggplot(df, aes(x, y, color=RDA3)) +ggtitle("RDA3") + geom_point(aes(x=x, y=y)) + coord_fixed() +scale_x_continuous(limits=c(-1,1)) + geom_jitter(position=position_jitter(width=0.025, height = 0.025), size=2) + scale_color_gradientn(colours=myPalette(100)) + theme_bw()+theme(text = element_text(size=12), axis.text.x = element_text(angle=0, vjust=1))



#
df_axes = df


#ggsave(grid.arrange(p1, p2,  p3, ncol=1), file="~/Dropbox/Figures_20170617/Revised/FinalFigures/FigureS8b.png", device="png",  width = 11, height = 8.5, units ="in",dpi = 300)

plot the samples scores; the first 4 axes were significant so let’s plot all of them

df = fm.PCNM.quick$RDA$CCA$wa[,1:4]
samples = data.frame(df, sample_data(f1))
samples$x = as.numeric(as.character(samples$x))
samples$y = as.numeric(as.character(samples$y))
mi = subset(samples, Tooth_Class %in% c("Molar", "Incisor_Central"))

buccal = subset(mi, Tooth_Aspect=="Buccal")
#subset and plot the buccal
p1= ggplot(buccal, aes(RDA1, RDA2, color=Tooth_Class)) + geom_point()+theme(legend.position = "none") + facet_wrap(Jaw_Quadrant~Tooth_Aspect, ncol=4) + theme_bw()

p2= ggplot(buccal, aes(RDA1, RDA3, color=Tooth_Class)) + geom_point()+theme(legend.position = "none") + facet_wrap(Jaw_Quadrant~Tooth_Aspect, ncol=4) + theme_bw()

p3= ggplot(buccal, aes(RDA1, RDA4, color=Tooth_Class)) + geom_point()+theme(legend.position = "none") + facet_wrap(Jaw_Quadrant~Tooth_Aspect, ncol=4) + theme_bw()

p4= ggplot(buccal, aes(RDA1, RDA3, color=Tooth_Class)) + geom_point()+theme(legend.position = "none") + facet_wrap(Jaw_Quadrant~Tooth_Aspect, ncol=4) + theme_bw()

p5= ggplot(buccal, aes(RDA2, RDA3, color=Tooth_Class)) + geom_point()+theme(legend.position = "none") + facet_wrap(Jaw_Quadrant~Tooth_Aspect, ncol=4) + theme_bw()

p6= ggplot(buccal, aes(RDA2, RDA4, color=Tooth_Class)) + geom_point()+theme(legend.position = "none") + facet_wrap(Jaw_Quadrant~Tooth_Aspect, ncol=4) + theme_bw()

#print to screen
grid.arrange(p1, p2, p3, p4, p5, p6, ncol=1)

subset and plot the lingual

lingual = subset(mi, Tooth_Aspect=="Lingual")

p1= ggplot(lingual, aes(RDA1, RDA2, color=Tooth_Class)) + geom_point()+theme(legend.position = "none") + facet_wrap(Jaw_Quadrant~Tooth_Aspect, ncol=4) + theme_bw()

p2= ggplot(lingual, aes(RDA1, RDA3, color=Tooth_Class)) + geom_point()+theme(legend.position = "none") + facet_wrap(Jaw_Quadrant~Tooth_Aspect, ncol=4) + theme_bw()

p3= ggplot(lingual, aes(RDA1, RDA4, color=Tooth_Class)) + geom_point()+theme(legend.position = "none") + facet_wrap(Jaw_Quadrant~Tooth_Aspect, ncol=4) + theme_bw()

p4= ggplot(lingual, aes(RDA1, RDA3, color=Tooth_Class)) + geom_point()+theme(legend.position = "none") + facet_wrap(Jaw_Quadrant~Tooth_Aspect, ncol=4) + theme_bw()

p5= ggplot(lingual, aes(RDA2, RDA3, color=Tooth_Class)) + geom_point()+theme(legend.position = "none") + facet_wrap(Jaw_Quadrant~Tooth_Aspect, ncol=4) + theme_bw()

p6= ggplot(lingual, aes(RDA2, RDA4, color=Tooth_Class)) + geom_point()+theme(legend.position = "none") + facet_wrap(Jaw_Quadrant~Tooth_Aspect, ncol=4) + theme_bw()

grid.arrange(p1, p2, p3, p4, p5, p6, ncol=3)

2. Plot the PCNM variables so that it’s possible to see at which scale the variation appears to occur

#get data into a frame for plotting
sigdata2 = data.frame(sigdata, sample_data(f1))
sigdata2$x = as.numeric(as.character(sigdata2$x))
sigdata2$y = as.numeric(as.character(sigdata2$y))


dfm = melt(sigdata2, id.vars = colnames(sample_data(f1)))
p4 = ggplot(dfm, aes(x, y, color=value))  + geom_point(aes(x=x, y=y)) + coord_fixed() +scale_x_continuous(limits=c(-1,1)) + geom_jitter(position=position_jitter(width=0.045, height = 0.045), size=2) + scale_color_gradientn(colours=myPalette(100)) + theme_bw() + facet_wrap(~variable, scales="free")+ theme(text = element_text(size=12), axis.text.x = element_text(angle=00, vjust=1))


#ggsave(p4, file="~/Dropbox/Figures_20170617/Revised/FinalFigures/FigureS8b.png", device="png",  width = 11, height = 8.5, units ="in",dpi = 300)

MEM

MEM

  • Note: the code here is from the Vignette: Stephane Dray, 2008. Moran’s eigenvectors of spatial weighting of matrices in R.

  • The analysis above raises two questions related to the question or how a neighborhood should be defined since an arbitrary cutoff is used to define what is near and what is not. MEM offers a flexible way of solving this problem by allowing for construction of multiple models each differing in their definition of neighbor. Different model selection parameters can then be used to choose the model that explains the most variance

Generate the neighbor graph
map = data.frame(sample_data(f1))
map <- map[order(map$Tooth_Number),] 

nbear1 <- dnearneigh(xygrid, 0, 0.3)
plot(nbear1, xygrid, col="red", pch=20, cex=2)

#compute the euclidean distances between sites and select for neighbors
dist_nbear1 <- nbdists(nbear1, xygrid)
str(dist_nbear1)
## List of 1701
##  $ : num [1:125] 0.151 0.292 0.262 0.279 0.151 ...
##  $ : num [1:151] 0.151 0.142 0.285 0.233 0.279 ...
##  $ : num [1:208] 0.292 0.142 0.145 0.289 0.244 ...
##  $ : num [1:182] 0.285 0.145 0.145 0.186 0.162 ...
##  $ : num [1:208] 0.289 0.145 0.221 0.271 0.178 ...
##  $ : num [1:182] 0.221 0.176 0.292 0.191 0.117 ...
##  $ : num [1:156] 0.176 0.21 0.207 0.148 0.249 ...
##  $ : num [1:155] 0.21 0.199 0.242 0.122 0.217 ...
##  $ : num [1:182] 0.199 0.202 0.205 0.136 0.169 ...
##  $ : num [1:207] 0.202 0.181 0.29 0.248 0.178 ...
##  $ : num [1:181] 0.181 0.109 0.274 0.27 0.22 ...
##  $ : num [1:176] 0.29 0.109 0.166 0.28 0.212 ...
##  $ : num [1:182] 0.274 0.166 0.151 0.284 0.23 ...
##  $ : num [1:94] 0.151 0.287 0.249 0.151 0.287 ...
##  $ : num [1:96] 0.172 0.196 0.279 0.172 0.196 ...
##  $ : num [1:157] 0.172 0.198 0.253 0.204 0.29 ...
##  $ : num [1:178] 0.198 0.14 0.29 0.23 0.189 ...
##  $ : num [1:182] 0.14 0.15 0.29 0.199 0.143 ...
##  $ : num [1:237] 0.29 0.15 0.146 0.295 0.176 ...
##  $ : num [1:207] 0.29 0.146 0.166 0.256 0.147 ...
##  $ : num [1:249] 0.166 0.14 0.272 0.256 0.147 ...
##  $ : num [1:218] 0.14 0.134 0.268 0.189 0.116 ...
##  $ : num [1:214] 0.2718 0.1337 0.1806 0.1662 0.0903 ...
##  $ : num [1:177] 0.181 0.166 0.213 0.159 0.166 ...
##  $ : num [1:151] 0.166 0.166 0.239 0.152 0.202 ...
##  $ : num [1:145] 0.166 0.14 0.26 0.203 0.228 ...
##  $ : num [1:151] 0.14 0.188 0.288 0.225 0.284 ...
##  $ : num [1:58] 0.188 0.255 0.188 0.255 0.188 ...
##  $ : num [1:63] 0.262 0.16 0.262 0.16 0.262 ...
##  $ : num [1:181] 0.279 0.233 0.244 0.16 0.151 ...
##  $ : num [1:209] 0.279 0.204 0.186 0.271 0.151 ...
##  $ : num [1:245] 0.257 0.162 0.178 0.292 0.268 ...
##  $ : num [1:207] 0.18 0.101 0.191 0.224 0.109 ...
##  $ : num [1:182] 0.236 0.117 0.207 0.225 0.157 ...
##  $ : num [1:188] 0.197 0.148 0.242 0.289 0.133 ...
##  $ : num [1:187] 0.249 0.122 0.205 0.202 0.141 ...
##  $ : num [1:182] 0.217 0.1362 0.2479 0.1408 0.0983 ...
##  $ : num [1:207] 0.1685 0.1781 0.2701 0.2388 0.0983 ...
##  $ : num [1:243] 0.265 0.207 0.22 0.28 0.188 ...
##  $ : num [1:207] 0.28 0.203 0.212 0.284 0.219 ...
##  $ : num [1:182] 0.266 0.23 0.287 0.277 0.162 ...
##  $ : num [1:93] 0.285 0.249 0.161 0.285 0.249 ...
##  $ : num [1:95] 0.196 0.253 0.181 0.196 0.253 ...
##  $ : num [1:183] 0.279 0.204 0.23 0.181 0.152 ...
##  $ : num [1:209] 0.29 0.189 0.199 0.295 0.152 ...
##  $ : num [1:275] 0.225 0.143 0.176 0.256 0.28 ...
##  $ : num [1:238] 0.188 0.116 0.147 0.256 0.238 ...
##  $ : num [1:244] 0.195 0.114 0.147 0.268 0.211 ...
##  $ : num [1:276] 0.2714 0.1596 0.0873 0.1888 0.2853 ...
##  $ : num [1:244] 0.17 0.116 0.166 0.236 0.16 ...
##  $ : num [1:244] 0.2574 0.1453 0.0903 0.2134 0.2631 ...
##  $ : num [1:240] 0.237 0.141 0.159 0.239 0.189 ...
##  $ : num [1:276] 0.243 0.166 0.152 0.26 0.295 ...
##  $ : num [1:208] 0.295 0.202 0.203 0.288 0.239 ...
##  $ : num [1:146] 0.228 0.225 0.295 0.156 0.196 ...
##  $ : num [1:93] 0.284 0.255 0.196 0.284 0.255 ...
##  $ : num [1:125] 0.151 0.292 0.262 0.279 0.151 ...
##  $ : num [1:151] 0.151 0.142 0.285 0.233 0.279 ...
##  $ : num [1:208] 0.292 0.142 0.145 0.289 0.244 ...
##  $ : num [1:182] 0.285 0.145 0.145 0.186 0.162 ...
##  $ : num [1:208] 0.289 0.145 0.221 0.271 0.178 ...
##  $ : num [1:182] 0.221 0.176 0.292 0.191 0.117 ...
##  $ : num [1:156] 0.176 0.21 0.207 0.148 0.249 ...
##  $ : num [1:155] 0.21 0.199 0.242 0.122 0.217 ...
##  $ : num [1:182] 0.199 0.202 0.205 0.136 0.169 ...
##  $ : num [1:207] 0.202 0.181 0.29 0.248 0.178 ...
##  $ : num [1:181] 0.181 0.109 0.274 0.27 0.22 ...
##  $ : num [1:176] 0.29 0.109 0.166 0.28 0.212 ...
##  $ : num [1:182] 0.274 0.166 0.151 0.284 0.23 ...
##  $ : num [1:94] 0.151 0.287 0.249 0.151 0.287 ...
##  $ : num [1:96] 0.172 0.196 0.279 0.172 0.196 ...
##  $ : num [1:157] 0.172 0.198 0.253 0.204 0.29 ...
##  $ : num [1:178] 0.198 0.14 0.29 0.23 0.189 ...
##  $ : num [1:182] 0.14 0.15 0.29 0.199 0.143 ...
##  $ : num [1:237] 0.29 0.15 0.146 0.295 0.176 ...
##  $ : num [1:207] 0.29 0.146 0.166 0.256 0.147 ...
##  $ : num [1:249] 0.166 0.14 0.272 0.256 0.147 ...
##  $ : num [1:218] 0.14 0.134 0.268 0.189 0.116 ...
##  $ : num [1:214] 0.2718 0.1337 0.1806 0.1662 0.0903 ...
##  $ : num [1:177] 0.181 0.166 0.213 0.159 0.166 ...
##  $ : num [1:151] 0.166 0.166 0.239 0.152 0.202 ...
##  $ : num [1:145] 0.166 0.14 0.26 0.203 0.228 ...
##  $ : num [1:151] 0.14 0.188 0.288 0.225 0.284 ...
##  $ : num [1:58] 0.188 0.255 0.188 0.255 0.188 ...
##  $ : num [1:63] 0.262 0.16 0.262 0.16 0.262 ...
##  $ : num [1:181] 0.279 0.233 0.244 0.16 0.151 ...
##  $ : num [1:209] 0.279 0.204 0.186 0.271 0.151 ...
##  $ : num [1:245] 0.257 0.162 0.178 0.292 0.268 ...
##  $ : num [1:207] 0.18 0.101 0.191 0.224 0.109 ...
##  $ : num [1:182] 0.236 0.117 0.207 0.225 0.157 ...
##  $ : num [1:188] 0.197 0.148 0.242 0.289 0.133 ...
##  $ : num [1:187] 0.249 0.122 0.205 0.202 0.141 ...
##  $ : num [1:182] 0.217 0.1362 0.2479 0.1408 0.0983 ...
##  $ : num [1:207] 0.1685 0.1781 0.2701 0.2388 0.0983 ...
##  $ : num [1:243] 0.265 0.207 0.22 0.28 0.188 ...
##  $ : num [1:207] 0.28 0.203 0.212 0.284 0.219 ...
##  $ : num [1:182] 0.266 0.23 0.287 0.277 0.162 ...
##  $ : num [1:93] 0.285 0.249 0.161 0.285 0.249 ...
##  $ : num [1:95] 0.196 0.253 0.181 0.196 0.253 ...
##   [list output truncated]
##  - attr(*, "class")= chr "nbdist"
##  - attr(*, "call")= language nbdists(nb = nbear1, coords = xygrid)
#define weights as a function of distance
fdist <- lapply(dist_nbear1, function(x) 1-x/max(dist(xygrid)))

#create the spatial weights
listw_nbear1 = nb2listw(nbear1, glist=fdist, style="B")
listw_nbear1
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 1701 
## Number of nonzero links: 313536 
## Percentage nonzero weights: 10.83624 
## Average number of links: 184.3245 
## 
## Weights style: B 
## Weights constants summary:
##      n      nn       S0       S1        S2
## B 1701 2893401 282596.9 509941.9 202662372

Select the spatial weighting matrix

 #fau <- sqrt(otus/outer(apply(otus, 1, sum), rep(1, ncol(otus)), "*"))
 #detrend
 faudt <- resid(lm(as.matrix(otus.h) ~ as.matrix(xygrid)))
library(spacemakeR)
## Loading required package: tripack
## 
## Attaching package: 'spacemakeR'
## The following object is masked from 'package:vegan':
## 
##     pcnm
 sc.nbear1 = scores.listw(listw_nbear1)
 AIC.nbear1 = ortho.AIC(faudt, sc.nbear1$vectors)
 AIC.nbear1
##  [1] -1447.169 -1458.091 -1467.897 -1475.638 -1480.692 -1485.114 -1489.108
##  [8] -1492.451 -1495.650 -1498.256 -1500.354 -1502.419 -1504.491 -1506.469
## [15] -1508.375 -1509.638 -1510.904 -1511.728 -1512.343 -1512.760 -1512.825
## [22] -1512.694 -1512.470 -1512.171 -1511.836 -1511.431 -1511.022 -1510.514
## [29] -1509.978 -1509.372 -1508.634 -1507.840 -1507.030 -1506.189 -1505.336
## [36] -1504.468 -1503.590 -1502.697 -1501.681 -1500.582 -1499.460 -1498.307
## [43] -1497.091 -1495.810 -1494.485 -1493.154 -1491.795 -1490.411 -1488.941
## [50] -1487.452 -1485.934 -1484.371 -1482.800 -1481.081 -1479.296
 #get the min AIC
 min(AIC.nbear1, na.rm=TRUE)
## [1] -1512.825
 which.min(AIC.nbear1)
## [1] 21
 #test.W takes 2 arguments (response matrix, object of class nb); it returns the best model
 nbear1.res = test.W(faudt, nbear1)
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##       AICc NbVar
## 1 -1512.22    20
 names(nbear1.res)
## [1] "all"  "best"
 names(nbear1.res$best)
## [1] "values"  "vectors" "call"    "AICc"    "AICc0"   "ord"     "R2"
 #estimate the best values of the parameters
  f2 <- function(x, dmax, y) {
     1 - (x^y)/(dmax)^y
 }
 maxi <- max(unlist(nbdists(nbear1, as.matrix(xygrid))))
 
 tri.f2 <- test.W(faudt, nbear1, f = f2, y = 2:10, dmax = maxi,
xy = xygrid)
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##   y      dmax      AICc NbVar
## 7 8 0.2949818 -1518.904    19
 names(tri.f2$best)
## [1] "values"  "vectors" "call"    "AICc"    "AICc0"   "ord"     "R2"
 myspec = variogmultiv(faudt, xygrid, nclass=20)
 myspec
## $d
##  [1] 0.05082135 0.15246406 0.25410677 0.35574948 0.45739219 0.55903489
##  [7] 0.66067760 0.76232031 0.86396302 0.96560573 1.06724843 1.16889114
## [13] 1.27053385 1.37217656 1.47381927 1.57546197 1.67710468 1.77874739
## [19] 1.88039010 1.98203281
## 
## $var
##  [1] 0.4392591 0.4303664 0.4276838 0.4442615 0.4428409 0.4492127 0.4431978
##  [8] 0.4377135 0.4171286 0.4117325 0.4111460 0.4055835 0.4111726 0.4164044
## [15] 0.4259216 0.4281296 0.4314636 0.4420018 0.4670504 0.4529522
## 
## $n.c
##  [1]  338 1701 1701 1701 1669 1701 1701 1701 1670 1670 1452 1421 1513 1701
## [15] 1701 1579 1484 1236 1002  406
## 
## $n.w
##  [1]   6665  77457  76175  71119  65896  62236  66440  64272  68295  77503
## [11]  59549  71617  61501  85808  96749 101314 110758  94416  75645  27344
## 
## $dclass
##  [1] "(0,0.102]"     "(0.102,0.203]" "(0.203,0.305]" "(0.305,0.407]"
##  [5] "(0.407,0.508]" "(0.508,0.61]"  "(0.61,0.711]"  "(0.711,0.813]"
##  [9] "(0.813,0.915]" "(0.915,1.02]"  "(1.02,1.12]"   "(1.12,1.22]"  
## [13] "(1.22,1.32]"   "(1.32,1.42]"   "(1.42,1.52]"   "(1.52,1.63]"  
## [17] "(1.63,1.73]"   "(1.73,1.83]"   "(1.83,1.93]"   "(1.93,2.03]"
 plot(myspec$d, myspec$var, ty="b", pch=20, xlab="Distance", ylab=("C(distance"))

#construct 20 neighborhood matrices with a distance criterion varuying along the sequence of 20 evenly distributed values between 0.2 and 2; then use this to pick the threshold of the best model for comparison with the PCNM and the trend surface analysis

#create 20 different models at differing thresholds
dxy = seq(give.thresh(dist(xygrid)), 5, le=20)
nbdnnlist <- lapply(dxy, dnearneigh, x = xygrid, d1 = 0)

#test the best model across this list of 20 different models
dmn.bin = lapply(nbdnnlist, test.W, Y=faudt)
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1507.957    24
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##       AICc NbVar
## 1 -1504.55    25
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1509.805    20
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1503.528    27
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1504.248    28
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1507.485    25
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1502.409    28
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1497.335    32
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1497.335    32
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1497.335    32
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1497.335    32
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1497.335    32
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1497.335    32
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1497.335    32
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1497.335    32
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1497.335    32
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1497.335    32
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1497.335    32
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1497.335    32
## 
## 
## AICc for the null model: -1433.56740998287 
## 
## Best spatial model:
##        AICc NbVar
## 1 -1497.335    32
length(dmn.bin)
## [1] 20
#for each nb we can find the lowest AIC
minAIC = sapply(dmn.bin, function(x) min(x$best$AICc, na.rm = T))
which.min(minAIC)
## [1] 3
dxy[which.min(minAIC)]
## [1] 1.041957

Extract the best model

MEM.champ = unlist(dmn.bin[which.min(minAIC)], recursive = FALSE)
summary(MEM.champ)
##      Length Class      Mode
## all  2      data.frame list
## best 7      -none-     list
MEM.champ$best$values #eigenvalues
##  [1]  685.365877  269.026806   75.692792   61.689549   31.603670
##  [6]   29.830306   28.357151   22.040975   17.362966   16.025082
## [11]   14.576507    8.063459    3.701469    1.772453   -8.077360
## [16]  -14.441614  -16.021188  -20.965509  -24.007124  -27.138497
## [21]  -27.983765  -28.191872  -28.868765  -29.505130  -30.228715
## [26]  -31.000000  -31.000000  -31.000000  -31.000000  -31.000000
## [31]  -31.000000  -31.000000  -31.000000  -31.000000  -31.492534
## [36]  -31.507173  -31.827568  -34.447225  -37.657836  -43.593100
## [41]  -45.621357  -48.611129  -56.514115  -64.978928  -65.864046
## [46]  -68.785035  -75.224520  -75.870788  -79.005358  -84.334437
## [51]  -94.834022 -110.081811 -121.688324 -129.242235 -166.326906
MEM.champ$best$ord #MEM variables by order of R2
##  [1]  4  6 51 42 29 43 36 35 40 38 55 13 50 10 31 34 49  9 27 32 30 21 33
## [24] 39 24 52 53 11 48 14 22 26 17  8 47 16 44  3 23 37  5 18 54 20 28 41
## [47] 46  7 25 12 15 45 19  1  2
#MEM variables selected in the best model
mem_ID = MEM.champ$best$ord[1:which.min(MEM.champ$best$AICc)]
length(mem_ID)
## [1] 20
sort(mem_ID)
##  [1]  4  6  9 10 13 27 29 31 32 34 35 36 38 40 42 43 49 50 51 55
MEM.all <- MEM.champ$best$vectors
MEM.select <- MEM.champ$best$vectors[,sort(c(mem_ID))]
colnames(MEM.select) <- sort(mem_ID)

#unadjusted of the best model
R2.membest = MEM.champ$best$R2[which.min(MEM.champ$best$AICc)]

#adjusted of the best model
RsquareAdj(R2.membest, nrow(otus.h), length(mem_ID))
## [1] 0.05523814
df = data.frame(MEM.select, sample_data(f1))
df$x = as.numeric(as.character(df$x))
df$y = as.numeric(as.character(df$y))
dfm = melt(df, id.vars = colnames(sample_data(f1)))

p1 = ggplot(dfm, aes(x, y, color=value)) + geom_point(aes(x=x, y=y)) + coord_fixed() +scale_x_continuous(limits=c(-1,1)) + geom_jitter(position=position_jitter(width=0.025, height = 0.025), size=3) + scale_color_gradientn(colours=myPalette(100)) + theme_bw() + facet_wrap(~variable)+ theme(text = element_text(size=14), axis.text.x = element_text(angle=90, vjust=1)) 
p1

#ggsave(p1, file="~/Dropbox/Figures_20170617/Revised/FinalFigures/FigureS9b.png",  width = 8.5, height = 11, units ="in",dpi = 300, device="png")

Let’s do an RDA using the 10 retained MEM variables

library(vegan)
fm.mem.rda = rda(otus.h~., as.data.frame(MEM.select))
fm.MEM.r2a = RsquareAdj(fm.mem.rda)$adj.r.squared
fm.MEM.r2a
## [1] 0.05397551
anova.cca(fm.mem.rda)
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = otus.h ~ `4` + `6` + `9` + `10` + `13` + `27` + `29` + `31` + `32` + `34` + `35` + `36` + `38` + `40` + `42` + `43` + `49` + `50` + `51` + `55`, data = as.data.frame(MEM.select))
##            Df Variance      F Pr(>F)    
## Model      20  0.02822 5.8497  0.001 ***
## Residual 1680  0.40524                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#how many axes are significant
axes.mem.test = anova.cca(fm.mem.rda, by="axis")
axes.mem.test
## Permutation test for rda under reduced model
## Forward tests for axes
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = otus.h ~ `4` + `6` + `9` + `10` + `13` + `27` + `29` + `31` + `32` + `34` + `35` + `36` + `38` + `40` + `42` + `43` + `49` + `50` + `51` + `55`, data = as.data.frame(MEM.select))
##            Df Variance       F Pr(>F)    
## RDA1        1  0.01473 61.0629  0.001 ***
## RDA2        1  0.00523 21.6888  0.001 ***
## RDA3        1  0.00336 13.9279  0.001 ***
## RDA4        1  0.00140  5.7833  0.067 .  
## RDA5        1  0.00078  3.2149  0.821    
## RDA6        1  0.00062  2.5696  0.964    
## RDA7        1  0.00055  2.2809  0.981    
## RDA8        1  0.00029  1.2146  1.000    
## RDA9        1  0.00024  1.0031  1.000    
## RDA10       1  0.00019  0.7795  1.000    
## RDA11       1  0.00017  0.7251  1.000    
## RDA12       1  0.00015  0.6157  1.000    
## RDA13       1  0.00012  0.4793  1.000    
## RDA14       1  0.00011  0.4608  1.000    
## RDA15       1  0.00007  0.3025  1.000    
## RDA16       1  0.00007  0.2721  1.000    
## RDA17       1  0.00005  0.2164  1.000    
## RDA18       1  0.00004  0.1790  1.000    
## RDA19       1  0.00003  0.1224  1.000    
## RDA20       1  0.00002  0.0951  1.000    
## Residual 1680  0.40524                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#how many terms are significant
terms.mem.test = anova.cca(fm.mem.rda, by="terms")
terms.mem.test
## Permutation test for rda under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = otus.h ~ `4` + `6` + `9` + `10` + `13` + `27` + `29` + `31` + `32` + `34` + `35` + `36` + `38` + `40` + `42` + `43` + `49` + `50` + `51` + `55`, data = as.data.frame(MEM.select))
##            Df Variance       F Pr(>F)    
## `4`         1  0.00468 19.4207  0.001 ***
## `6`         1  0.00318 13.1931  0.001 ***
## `9`         1  0.00050  2.0813  0.018 *  
## `10`        1  0.00056  2.3230  0.005 ** 
## `13`        1  0.00064  2.6559  0.004 ** 
## `27`        1  0.00049  2.0496  0.020 *  
## `29`        1  0.00238  9.8863  0.001 ***
## `31`        1  0.00060  2.4703  0.010 ** 
## `32`        1  0.00048  1.9927  0.021 *  
## `34`        1  0.00055  2.2796  0.007 ** 
## `35`        1  0.00134  5.5757  0.001 ***
## `36`        1  0.00170  7.0407  0.001 ***
## `38`        1  0.00084  3.4939  0.003 ** 
## `40`        1  0.00100  4.1479  0.001 ***
## `42`        1  0.00267 11.0828  0.001 ***
## `43`        1  0.00180  7.4455  0.001 ***
## `49`        1  0.00054  2.2538  0.018 *  
## `50`        1  0.00061  2.5418  0.005 ** 
## `51`        1  0.00280 11.5966  0.001 ***
## `55`        1  0.00084  3.4623  0.001 ***
## Residual 1680  0.40524                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
nb.axes = length(which(axes.mem.test[,4] <=0.05))
#get the fitted model
axes = data.frame(fm.mem.rda$CCA$u[,1:7], sample_data(f1))



#plot with ggplot
dfm = melt(axes, id.var=colnames(sample_data(f1)))
dfm$x = as.numeric(as.character(dfm$x))
dfm$y = as.numeric(as.character(dfm$y))


dfm = subset(dfm, variable %in% c("RDA1", "RDA2", "RDA3"))

#plot the fitted model
p1 = ggplot(dfm, aes(x, y, color=value)) + geom_point(aes(x=x, y=y)) + coord_fixed() +scale_x_continuous(limits=c(-1,1)) + geom_jitter(position=position_jitter(width=0.025, height = 0.025), size=3) + scale_color_gradientn(colours=myPalette(100)) + theme_bw() + facet_wrap(~variable, ncol=1)+ theme(text = element_text(size=14), axis.text.x = element_text(angle=0, vjust=1)) 
p1

#ggsave(p1, file="~/Dropbox/Figures_20170617/Revised/FinalFigures/FigureS9a.png",  width = 8.5, height = 11, units ="in",dpi = 300, device="png")

plot the sample scores as a function of tooth class

axes = data.frame(fm.mem.rda$CCA$wa[,1:5], sample_data(f1))
dfm = melt(axes, id.var=colnames(sample_data(f1)))
dfm$x = as.numeric(as.character(dfm$x))
dfm$y = as.numeric(as.character(dfm$y))


buccal = subset(dfm, Tooth_Aspect=="Buccal")
lingual = subset(dfm, Tooth_Aspect=="Lingual")
ordering = 1:32
buccal$interval <- factor(buccal$Tooth_Number, as.character(ordering))
lingual$interval <- factor(lingual$Tooth_Number, as.character(ordering))
     
p1 = ggplot(buccal, aes(as.numeric(Tooth_Number), value)) + geom_smooth()+ facet_wrap(~variable, ncol=7, scales="free") + ggtitle("Buccal")
p2 = ggplot(lingual, aes(as.numeric(Tooth_Number), value)) + geom_smooth()+ facet_wrap(~variable, ncol=7, scales="free")+ ggtitle("Lingual")

grid.arrange(p1, p2, ncol=1)
## `geom_smooth()` using method = 'loess'
## `geom_smooth()` using method = 'loess'